# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : 批量写入数据(helpers.bulk).py
# @Author: dongguangwen
# @Date  : 2025-05-25 18:02
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import json
import time

es = Elasticsearch(hosts='http://192.168.1.119:9200')
# print(es)


datas = []

start = time.time()
# 开始批量写入es数据库
# 批量写入数据
actions = []
with open('./person_info_100w.json', 'r', encoding='utf-8') as f:
    for line in f:
        line = json.loads(line)
        data = {
            "_index": "persons",
            "_id": line.get('id'),
            "_source": {
                "id": line.get('id'),
                "name": line.get('name'),
                "sex": line.get('sex'),
                "age": line.get('age'),
                "email": line.get('email'),
                "simple_profile": {
                    "username": line.get('simple_profile').get('username'),
                    "name": line.get('simple_profile').get('name'),
                    "sex": line.get('simple_profile').get('sex'),
                    "address": line.get('simple_profile').get('address'),
                    "mail": line.get('simple_profile').get('mail'),
                    "birthdate": line.get('simple_profile').get('birthdate'),
                },
                "profile": {
                    "job": line.get('profile').get('job'),
                    "company": line.get('profile').get('company'),
                    "ssn": line.get('profile').get('ssn'),
                    "residence": line.get('profile').get('residence'),
                    "blood_group": line.get('profile').get('blood_group'),
                    "website": line.get('profile').get('website'),
                    "username": line.get('profile').get('username'),
                    "name": line.get('profile').get('name'),
                    "sex": line.get('profile').get('sex'),
                    "address": line.get('profile').get('address'),
                    "mail": line.get('profile').get('mail'),
                    "birthdate": line.get('profile').get('birthdate'),
                },
                "location": {
                    "country": line.get('country'),
                    "province": line.get('province'),
                    "city": line.get('city'),
                    "street_name": line.get('street_name'),
                    "street_address": line.get('street_address'),
                    "address": line.get('address'),
                    "postcode": line.get('postcode'),
                    "gps": line.get('gps')
                },
                "net_info": {
                    "ipv4": line.get('ipv4'),
                    "ipv6": line.get('ipv6'),
                    "uri": line.get('uri'),
                    "url": line.get('url'),
                    "img_url": line.get('img_url'),
                    "domain": line.get('domain'),
                    "user_agent": line.get('user_agent')
                },
                "character": line.get('character'),
                "long_text": line.get('long_text'),
                "time": line.get('time')
            }
        }
        # print(data)
        # exit()
        actions.append(data)
        if len(actions) >= 300:
            print(line.get('id'))
            helpers.bulk(es, actions)
            actions = []
    if len(actions) > 0:
        helpers.bulk(es, actions)

end = time.time()
print('花费时间：', end - start)

"""
花费时间： 656.2222752571106
"""